import requests
from fake_useragent import UserAgent
from lxml import etree
import pymongo


class SeventeenKNovelSpider:
    def __init__(self):
        self.seventeen_k_url = "https://www.17k.com/all/book/2_0_0_5_3_0_1_0_1.html"
        self.headers = {'User-Agent': UserAgent().random}
        self.html = requests.get(url=self.seventeen_k_url, headers=self.headers, timeout=3)
        self.html.encoding = 'utf-8'
        self.con = pymongo.MongoClient(host='10.0.0.10', port=27017, maxPoolSize=100)

    def get_novel_url_list(self):
        """ 获取男生免费完结版200万字以上小说top30 URL列表 """
        p = etree.HTML(self.html.text)
        novel_url_list = []
        for num in range(2, 29):
            li_list = p.xpath('/html/body/div[4]/div[3]/div[2]/table/tbody/tr[{}]/td[3]/span/a/@href'.format(num))
            # 拼凑小说详情地址
            novel_url_list.append('https:' + li_list[0].replace('book', 'list'))
        return novel_url_list

    def get_novel_detail(self, novel_url_list):
        """ 获取小说名称、章节名称、章节 URL """
        for novel_url in novel_url_list:
            novel_html = requests.get(url=novel_url, headers=self.headers, timeout=3)
            novel_html.encoding = 'utf-8'
            p = etree.HTML(novel_html.text)
            novel_name = p.xpath('/html/body/div[5]/h1/text()')[0].replace('\n', '').replace('\t', '')
            li_list = p.xpath('//div[@class="Main List"]/dl/dd/a[@target="_blank"]')
            num = 0
            for li in li_list:
                num += 1
                novel_chapter_name = li.xpath('.//span/text()')[0].replace('\n', '').replace('\t', '')
                novel_chapter_href = li.xpath('.//@href')
                novel_chapter_content_url = 'https://www.17k.com' + novel_chapter_href[0].strip()
                novel_content = self.get_novel_content(url=novel_chapter_content_url)
                self.write_monogo(novel_name, novel_chapter_name, novel_content, num=str(num))

    def get_novel_content(self, url):
        """ 获取小说内容 """
        try:
            html_con = requests.get(url=url, headers=self.headers, timeout=3)
            html_con.encoding = 'utf-8'
            html = html_con.text
            p = etree.HTML(html)
            novel_content = p.xpath('/html/body/div[4]/div[2]/div[2]/div[1]/div[2]/p/text()')
            return novel_content
        except Exception as e:
            print("请求超时", e)
            return "请求超时"

    def write_monogo(self, novel_name, novel_chapter_name, novel_content, num):
        """ 写入mongo数据库 并打印存储的小说内容"""
        db = self.con[novel_name]
        item = {'内容': str(novel_content)}
        if len(novel_content) > 0:
            print(novel_name, '0' + num + '_' + novel_chapter_name)
            myset = db['0' + num + '_' + novel_chapter_name]
            # 插入
            myset.insert_one(item)
            # 读取
            res = myset.find()
            for re in res:
                print(re)

    def run(self):
        """程序的入口函数"""
        novel_url_list = self.get_novel_url_list()
        self.get_novel_detail(novel_url_list)
        self.con.close()


if __name__ == '__main__':
    spider = SeventeenKNovelSpider()
    spider.run()
